from selenium import webdriver
from selenium.webdriver.common.by import By

import time

driver = webdriver.Chrome(executable_path='D:\spider\chromedriver_win32\chromedriver.exe')

url = "https://www.douyu.com/directory/all"
driver.get(url)
# 保存获取到的网页的源代码
# print(driver.page_source)

# //*[@id="listAll"]/section[2]/div[2]/ul/li[2]/div/a/div[2]/div[1]/h3
# 第二个房间的标题
# xpath = '//*[@id="listAll"]/section[2]/div[2]/ul/li[2]/div/a/div[2]/div[1]/h3'
# h3 = driver.find_element(By.XPATH,xpath)
# print(h3.text)
# # 第3个房间的标题
# xpath = '//*[@id="listAll"]/section[2]/div[2]/ul/li[3]/div/a/div[2]/div[1]/h3'
# h3 = driver.find_element(By.XPATH,xpath)
# print(h3.text)
rooms = []
for i in range(1,121):
    room = {}
    try:
        # 1 到 120 房间
        # 标题
        xpath = '//*[@id="listAll"]/section[2]/div[2]/ul/li[' + str(i) + ']/div/a/div[2]/div[1]/h3'
        h3 = driver.find_element(By.XPATH, xpath)
        if h3:
            room["title"] = h3.text
            print(h3.text)
        # 主播名
        # //*[@id="listAll"]/section[2]/div[2]/ul/li[1]/div/a/div[2]/div[2]/h2/div
        xpath = '//*[@id="listAll"]/section[2]/div[2]/ul/li[' + str(i) + ']/div/a/div[2]/div[2]/h2/div'
        div = driver.find_element(By.XPATH, xpath)
        if div:
            room["name"] = div.text
            print(div.text)
        # 类型
        # // *[ @ id = "listAll"] / section[2] / div[2] / ul / li[1] / div / a / div[2] / div[1] / span
        xpath = '// *[ @ id = "listAll"] / section[2] / div[2] / ul / li[' + str(i) + '] / div / a / div[2] / div[1] / span'
        span = driver.find_element(By.XPATH, xpath)
        if span:
            room["type"] = span.text
            print(span.text)
        # 房间编号
        # // *[ @ id = "listAll"] / section[2] / div[2] / ul / li[1] / div / a
        xpath = '// *[ @ id = "listAll"] / section[2] / div[2] / ul / li[' + str(
            i) + '] / div / a'
        a = driver.find_element(By.XPATH, xpath)
        if a:
            room["number"] = a.get_attribute('href')
            print(a.get_attribute('href'))

        rooms.append(room)
    except Exception as e:
        print(e)

# 数据保存
f = open("douyu.csv",encoding="utf-8",mode="w")
for room in rooms:
    f.write(room['title'] + "," + room['name'] + ","
            + room['type'] + "," + room['number'] + "\n" )
f.close()
# driver.implicitly_wait(15)
# time.sleep(3)
# 获取本页的全部的房间的li元素：120个
# 基于xpath表达式：
#  //div[@class='layout-Module-container layout-Cover ListContent']/ul/li
# xpathString = "//div[@class='layout-Module-container layout-Cover ListContent']/ul/li"
# liElements = driver.find_elements(by=By.XPATH,value=xpathString)
# time.sleep(3)
# print(len(liElements))
# # print(liElements)
# # 遍历每个房间的li
# roomsList = []
#
# for liElement in liElements:
#     time.sleep(1)
#     try:
#         room = {}
#         # 房间名称
#         roomName = liElement.find_element(By.XPATH,value=".//h3[@class='DyListCover-intro']")
#         if roomName:
#             # 取元素中的text文本
#             print(roomName.text)
#             room["name"] = roomName.text
#
#         roomsList.append(room)
#     except Exception as e :
#         print(e)

# 暂停 5 秒
# time.sleep(50)
# 等待一会儿

# 显示终止 driver执行，释放控制
print("end..........")
driver.quit()



